Scatterplots of exposure, mediators, and outcome

Associations of exposure, mediators, and outcome

#excess covid deaths per 100,000 (in 2021) for every 10% increase in poverty
kable(cbind(reg_ye_2021.beta,reg_ye_2021.lcl,reg_ye_2021.ucl,reg_ye_2021.p/10)[2,]*10)
x
reg_ye_2021.beta 25.27894
reg_ye_2021.lcl 20.04640
reg_ye_2021.ucl 30.51149
0.00000
#reduction in covid deaths per 100,000 (in 2021) for every 10% increase in vaccination rate
kable(cbind(reg_ym_vax.beta,reg_ym_vax.lcl,reg_ym_vax.ucl,reg_ym_vax.p/10)[2,]*10)
x
reg_ym_vax.beta -7.150849
reg_ym_vax.lcl -9.528206
reg_ym_vax.ucl -4.773492
0.000000

US heatmap of exposure, mediators, and outcome

Basic characteristics

Characteristic N = 3,1421
Proportion aged 65+ years (%) 19.8 (4.8)
Proportion of males (%) 50.1 (2.3)
Proportion of Asians, Native Hawaiians, or other Pacific Islanders, non-Hispanic (%) 1.4 (2.9)
Proportion of Hispanics (%) 9.2 (13.7)
Proportion of non-Hispanic Whites (%) 76.2 (20.2)
(Missing) 1
Proportion of non-Hispanic Blacks (%) 8.9 (14.5)
Proportion of American Indians/Alaska Natives, non-Hispanic (%) 1.8 (7.6)
Proportion living in poverty (%) 14.5 (5.8)
(Missing) 1
Proportion without 4+ years of college education (%) 78.0 (9.6)
Proportion of smokers (%) 20.5 (4.0)
(Missing) 316
Proportion of adults with diabetes (%) 8.7 (1.6)
(Missing) 21
Proportion of obese adults (%) 36.0 (4.0)
(Missing) 316
Proportion of adults with chronic obstructive pulmonary disease (%) 7.4 (1.8)
(Missing) 21
Proportion of adults with high blood pressure (%) 32.7 (4.9)
(Missing) 21
Proportion of adults with coronary heart disease (%) 6.2 (1.0)
(Missing) 21
Proportion of adults with stroke (%) 3.4 (0.7)
(Missing) 21
Percent fully vaccinated by December 31st, 2021 (%) 52.4 (12.5)
(Missing) 16
COVID-19 deaths per 100,000 in April-December 2021 120.9 (71.8)
(Missing) 9

1 Mean (SD)

Mediation analysis of the SES-COVID-19 mortality association

Outcome: COVID-19 death rates in 2021

Mediators: vaccination rate up to December 31, 2021

#Variable selection
set.seed(123)
Y = "deaths_2021"
E = "pctPoverty"
M = c(
  "Series_Complete_Pop_Pct"
)
C = c("PropMale",
      "PropAbove65",
      "no.college.diploma",
      "native",
      "asian",
      "black",
      "white",
      "hispanic",
      "state"
) 

d = sdf[c("fips",Y,E,M,C)]

createfmla <- function(yvar, xvars) {
  rhs <- paste(xvars, collapse=" + ")
  return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}
createint <- function(Y,E,M) {
  list <- NULL
  for(i in 1:length(M)){
    list <- c(list,paste(E,M[i], sep="*"))
  }
  rhs <- paste(list, collapse=" + ")
  return(as.formula(paste(Y, "~", rhs, sep=" ")))
}

set.seed(123)


#create dataset
d_l <- subset(d, state %in% c("AL","AK","AZ","CA","CO","CT","DE",
                              "FL","GA","HI","ID","IL","IN","DC", 
                              "KS","KY","LA","ME","MD","MA","MI",
                              "MN","MT","MS","MO","NV","NH","NJ",
                              "NM","NY","NC","OH","OK","OR","PA",
                              "RI","SC","TN","TX","VA","VT","WA",
                              "WV","WI", #lockdown states
                              "AR","IA","NE","ND","SD","UT","WY" #no lockdown states
                              ))

# max(d_l$Series_Complete_Pop_Pct,na.rm=TRUE)
# quantile(d_l$Series_Complete_Pop_Pct,0.90,na.rm=TRUE)
# quantile(d_l$Series_Complete_Pop_Pct,0.75,na.rm=TRUE)
# quantile(d_l$Series_Complete_Pop_Pct,0.50,na.rm=TRUE)

#impute with RF
set.seed(123)
d_l_mf <- d_l
d_l_mf$state <- factor(d_l_mf$state)
d_l_mf <- subset(d_l_mf, select=-c(fips))
dimp <- missForest(xmis = d_l_mf, ntree=100, maxiter = 10, parallelize = 'no')
# dimp$OOBerror
# dimp$error
d_l_rf<-cbind(d_l$fips, dimp$ximp)

createfmla <- function(yvar, xvars) {
  rhs <- paste(xvars, collapse=" + ")
  return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}

set.seed(123)

#g-formula approach

#max values
mval.list <- list()

mval.list <- list(max(d_l$Series_Complete_Pop_Pct, na.rm=TRUE)) 
fit_l <- cmest(
  data=d_l_rf, model="gformula", 
  outcome = Y, exposure = E, mediator = M, basec = C, 
  yreg="linear", mreg=rep(list("linear"),length(M)),
  astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10, 
  EMint=TRUE,
  mval = mval.list,
  estimation = "imputation", multimp = FALSE,
  inference = "bootstrap", nboot = 1000)
res_mediation_max_vax <- data.frame(
  "Statistic" = c("Total effect", "Controlled direct effect", "Proportion eliminated"),
  "Effect" = 
    round(c(summary(fit_l)$effect.pe["te"],summary(fit_l)$effect.pe["cde"],summary(fit_l)$effect.pe["pe"]),2),
  "Lower CL" = 
    round(c(summary(fit_l)$effect.ci.low["te"],summary(fit_l)$effect.ci.low["cde"],summary(fit_l)$effect.ci.low["pe"]),2),
  "Upper CL" = 
    round(c(summary(fit_l)$effect.ci.high["te"],summary(fit_l)$effect.ci.high["cde"],summary(fit_l)$effect.ci.high["pe"]),2),
  "P-value" = 
    round(c(summary(fit_l)$effect.pval["te"],summary(fit_l)$effect.pval["cde"],summary(fit_l)$effect.pval["pe"]),3)
)

res_mediation_max_vax %>% 
  gt() %>%
  cols_label(
    Statistic = " ",
    Lower.CL = "Lower CL",
    Upper.CL = "Upper CL",
    P.value = "P-value"
  ) %>% gt::gtsave(          
    filename = sprintf("%s/tables/mediation_max_vax.html",rootdir))

#90th percentile
mval.list <- list()
mval.list <- list(quantile(d_l$Series_Complete_Pop_Pct,0.90,na.rm=TRUE))
fit_l <- cmest(
  data=d_l_rf, model="gformula", 
  outcome = Y, exposure = E, mediator = M, basec = C, 
  yreg="linear", mreg=rep(list("linear"),length(M)),
  astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10, 
  EMint=TRUE,
  mval = mval.list,
  estimation = "imputation", multimp = FALSE,
  inference = "bootstrap", nboot = 1000)
res_mediation_0.9_vax <- data.frame(
  "Statistic" = c("Total effect", "Controlled direct effect", "Proportion eliminated"),
  "Effect" = 
    round(c(summary(fit_l)$effect.pe["te"],summary(fit_l)$effect.pe["cde"],summary(fit_l)$effect.pe["pe"]),2),
  "Lower CL" = 
    round(c(summary(fit_l)$effect.ci.low["te"],summary(fit_l)$effect.ci.low["cde"],summary(fit_l)$effect.ci.low["pe"]),2),
  "Upper CL" = 
    round(c(summary(fit_l)$effect.ci.high["te"],summary(fit_l)$effect.ci.high["cde"],summary(fit_l)$effect.ci.high["pe"]),2),
  "P-value" = 
    round(c(summary(fit_l)$effect.pval["te"],summary(fit_l)$effect.pval["cde"],summary(fit_l)$effect.pval["pe"]),3)
)

res_mediation_0.9_vax %>% 
  gt() %>%
  cols_label(
    Statistic = " ",
    Lower.CL = "Lower CL",
    Upper.CL = "Upper CL",
    P.value = "P-value"
  ) %>% gt::gtsave(          
    filename = sprintf("%s/tables/mediation_0.9_vax.html",rootdir))

#75th percentile
mval.list <- list()
mval.list <- list(quantile(d_l$Series_Complete_Pop_Pct,0.75,na.rm=TRUE))
fit_l <- cmest(
  data=d_l_rf, model="gformula", 
  outcome = Y, exposure = E, mediator = M, basec = C, 
  yreg="linear", mreg=rep(list("linear"),length(M)),
  astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10, 
  EMint=TRUE,
  mval = mval.list,
  estimation = "imputation", multimp = FALSE,
  inference = "bootstrap", nboot = 1000)
res_mediation_0.75_vax <- data.frame(
  "Statistic" = c("Total effect", "Controlled direct effect", "Proportion eliminated"),
  "Effect" = 
    round(c(summary(fit_l)$effect.pe["te"],summary(fit_l)$effect.pe["cde"],summary(fit_l)$effect.pe["pe"]),2),
  "Lower CL" = 
    round(c(summary(fit_l)$effect.ci.low["te"],summary(fit_l)$effect.ci.low["cde"],summary(fit_l)$effect.ci.low["pe"]),2),
  "Upper CL" = 
    round(c(summary(fit_l)$effect.ci.high["te"],summary(fit_l)$effect.ci.high["cde"],summary(fit_l)$effect.ci.high["pe"]),2),
  "P-value" = 
    round(c(summary(fit_l)$effect.pval["te"],summary(fit_l)$effect.pval["cde"],summary(fit_l)$effect.pval["pe"]),3)
)

res_mediation_0.75_vax %>% 
  gt() %>%
  cols_label(
    Statistic = " ",
    Lower.CL = "Lower CL",
    Upper.CL = "Upper CL",
    P.value = "P-value"
  ) %>% gt::gtsave(          
    filename = sprintf("%s/tables/mediation_0.75_vax.html",rootdir))
#Vaccination rate set to max
knitr::kable(res_mediation_max_vax)
Statistic Effect Lower.CL Upper.CL P.value
te Total effect 25.43 20.29 30.51 0.00
cde Controlled direct effect 4.99 -6.87 17.56 0.39
pe Proportion eliminated 0.80 0.34 1.26 0.00
#Vaccination rate set to 90th percentile
knitr::kable(res_mediation_0.9_vax)
Statistic Effect Lower.CL Upper.CL P.value
te Total effect 25.40 20.49 31.07 0
cde Controlled direct effect 15.94 10.10 21.74 0
pe Proportion eliminated 0.37 0.19 0.57 0
#Vaccination rate set to 75th percentile
knitr::kable(res_mediation_0.75_vax)
Statistic Effect Lower.CL Upper.CL P.value
te Total effect 25.39 20.30 31.81 0
cde Controlled direct effect 19.95 14.96 25.80 0
pe Proportion eliminated 0.21 0.11 0.32 0

How much of the SES-COVID association is shifted southwest by setting vaccinations to the maximum observed value?

PE and risk reduction in COVID-19 mortality for different values of the vaccination rate

Does increasing the vaccination rate increase PE of the association between SES and COVID-19 mortality (attenuate health inequity) and reduce average COVID-19 mortality (improve population health)?

Outcome: COVID-19 death rates in 2021

Mediator: vaccination rate up to December 31, 2021

###for each vax, obtain PE and risk reduction in COVID-19 deaths
#Variable selection
set.seed(123)
Y = "deaths_2021"
E = "pctPoverty"
M = c(
    "Series_Complete_Pop_Pct"
    )
C = c("PropMale",
    "PropAbove65",
    "no.college.diploma",
    "native",
    "asian",
    "black",
    "white",
    "hispanic",
    "state"
    ) 

d = sdf[c("fips",Y,E,M,C)]


createfmla <- function(yvar, xvars) {
  rhs <- paste(xvars, collapse=" + ")
  return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}
createint <- function(Y,E,M) {
  list <- NULL
  for(i in 1:length(M)){
    list <- c(list,paste(E,M[i], sep="*"))
    }
  rhs <- paste(list, collapse=" + ")
  return(as.formula(paste(Y, "~", rhs, sep=" ")))
}

set.seed(123)


#create dataset
d_l <- subset(d, state %in% c("AL","AK","AZ","CA","CO","CT","DE",
                              "FL","GA","HI","ID","IL","IN","DC", 
                              "KS","KY","LA","ME","MD","MA","MI",
                              "MN","MT","MS","MO","NV","NH","NJ",
                              "NM","NY","NC","OH","OK","OR","PA",
                              "RI","SC","TN","TX","VA","VT","WA",
                              "WV","WI", #lockdown states
                              "AR","IA","NE","ND","SD","UT","WY" #no lockdown states
                              ))

#impute with RF
set.seed(123)
d_l_mf <- d_l
d_l_mf$state <- factor(d_l_mf$state)
d_l_mf <- subset(d_l_mf, select=-c(fips))
dimp <- missForest(xmis = d_l_mf, ntree=100, maxiter = 10, parallelize = 'no')
# dimp$OOBerror
# dimp$error
d_l_rf<-cbind(d_l$fips, dimp$ximp)

vax <- NULL
pe <- NULL
rd <- NULL
median_vax = median(d_l$Series_Complete_Pop_Pct, na.rm=TRUE)
max_vax = max(d_l$Series_Complete_Pop_Pct, na.rm=TRUE)
plot_vax <- NULL
n <- 1

for (j in seq(median_vax,max_vax,by=1)){ 
    mval.list_combined <- list()
    mval.list_combined <- list(j) 
    fit_l_combined <- cmest(
      data=d_l_rf, model="gformula", 
      outcome = Y, exposure = E, mediator = M, basec = C, 
      yreg="linear", mreg=rep(list("linear"),length(M)),
      astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10, 
      EMint=TRUE,
      mval = mval.list_combined,
      estimation = "imputation", multimp = FALSE,
      inference = "bootstrap", nboot = 1)
    vax[n] = j
    pe[n] = summary(fit_l_combined)[9]$effect.pe["pe"] * 100
        
    #create regression with E, M, C
    reg = glm(deaths_2021 ~ Series_Complete_Pop_Pct + pctPoverty + pctPoverty*Series_Complete_Pop_Pct + 
                PropMale + 
                PropAbove65 +
                no.college.diploma +
                native +
                asian +
                black +
                white +
                hispanic +
                state,
              family = gaussian(link="identity"), data = d_l_rf)
    
    d_l_mmax <- d_l_rf
    d_l_mmax$Series_Complete_Pop_Pct <- j
    
    #risk reduction (how many fewer deaths per 100,000 population with the specified mediator values?)
    rd[n] = -(mean(predict(reg, newdata=d_l_mmax)) - mean(d_l_mmax$deaths_2021))
    
    n <- n+1
}

#max risk reduction
-(mean(predict(reg, newdata=d_l_mmax)) - mean(d_l_mmax$deaths_2021))
t.test(d_l_mmax$deaths_2021, predict(reg, newdata=d_l_mmax), paired=TRUE)

plot_vax <- data.frame(pe, rd, vax)

fig_vax = ggplot(data = plot_vax, aes(x = pe,y = rd, color = vax)) + 
  geom_point(size = 3) +
  scale_x_continuous("Proportion eliminated of the poverty-COVID-19 death rate association (%)") +  
  scale_y_continuous("Average reduction in COVID-19 deaths per 100,000") +
  scale_color_gradient(low="blue", high="red") +
  labs(color="Vaccination \nrate (%)") +
  theme(legend.position = c(0.8, 0.3),legend.background = element_rect(fill="white",linetype="solid", color ="black")) 

#Variable selection
set.seed(123)
Y = "deaths_2021"
E = "pctPoverty"
M = c(
    "Series_Complete_Pop_Pct"
    )
C = c("PropMale",
    "PropAbove65",
    "no.college.diploma",
    "native",
    "asian",
    "black",
    "white",
    "hispanic",
    "state"
    ) 

d = sdf[c("fips",Y,E,M,C)]


createfmla <- function(yvar, xvars) {
  rhs <- paste(xvars, collapse=" + ")
  return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}
createint <- function(Y,E,M) {
  list <- NULL
  for(i in 1:length(M)){
    list <- c(list,paste(E,M[i], sep="*"))
    }
  rhs <- paste(list, collapse=" + ")
  return(as.formula(paste(Y, "~", rhs, sep=" ")))
}

set.seed(123)


#create dataset
d_l <- subset(d, state %in% c("AL","AK","AZ","CA","CO","CT","DE",
                              "FL","GA","HI","ID","IL","IN","DC", 
                              "KS","KY","LA","ME","MD","MA","MI",
                              "MN","MT","MS","MO","NV","NH","NJ",
                              "NM","NY","NC","OH","OK","OR","PA",
                              "RI","SC","TN","TX","VA","VT","WA",
                              "WV","WI", #lockdown states
                              "AR","IA","NE","ND","SD","UT","WY" #no lockdown states
                              ))

#impute with RF
set.seed(123)
d_l_mf <- d_l
d_l_mf$state <- factor(d_l_mf$state)
d_l_mf <- subset(d_l_mf, select=-c(fips))
dimp <- missForest(xmis = d_l_mf, ntree=100, maxiter = 10, parallelize = 'no')
# dimp$OOBerror
# dimp$error
d_l_rf<-cbind(d_l$fips, dimp$ximp)

#the original association
reg = glm(createfmla(Y, E), family = gaussian(link="identity"), data = d_l_rf)
summary(reg)

#create regression with E, M, C, and predict Y with max vax
#with E-M interaction
reg = glm(deaths_2021 ~ pctPoverty + Series_Complete_Pop_Pct + pctPoverty*Series_Complete_Pop_Pct + 
            PropMale + 
            PropAbove65 +
            no.college.diploma +
            native +
            asian +
            black +
            white +
            hispanic +
            state,
          family = gaussian(link="identity"), data = d_l_rf)
d_l_mmax <- d_l_rf
d_l_mmax$Series_Complete_Pop_Pct <- max(d_l$Series_Complete_Pop_Pct,na.rm=TRUE)
d_l_mmax[Y] <- predict(reg, newdata=d_l_mmax)

#Regress with predicted Y
reg = glm(createfmla(Y, E), family = gaussian(link="identity"), data = d_l_mmax)
summary(reg)

#add to scatterplot
fig  <-  ggpubr::ggscatter(data=d_l, x="pctPoverty",y="deaths_2021", fill="grey45", color="grey45",
                            xlab = "Poverty rate (%)", ylab = "COVID-19 deaths per 100,000",
                            add = "reg.line", #conf.int = TRUE, 
                            #cor.coef = TRUE, 
                            cor.method = "pearson",
                            add.params = list(color = "Black",fill = "Black") 
) 
scatterplot_vax = 
  fig + 
    geom_segment(aes(x = min(d_l$pctPoverty, na.rm=TRUE), xend = max(d_l$pctPoverty, na.rm=TRUE), 
                     y = coef(reg)["(Intercept)"] + coef(reg)["pctPoverty"]*min(d_l$pctPoverty, na.rm=TRUE), 
                     yend = coef(reg)["(Intercept)"] + coef(reg)["pctPoverty"]*max(d_l$pctPoverty, na.rm=TRUE)),
                 color="red", lwd=1) + 
    xlim(min(d_l$pctPoverty, na.rm=TRUE), max(d_l$pctPoverty, na.rm=TRUE)) +
    annotate(geom="text", x=45, y=110, label="Vaccination rate \nmaximized", color="red") +
    annotate(geom="text", x=45, y=290, label="Observed \nvaccination rate", color="Black")


pdf(sprintf("%s/figures/southwest_vax.pdf",rootdir), width=6, height=10)
ggarrange(fig_vax, scatterplot_vax, ncol = 1, labels = c("A", "B"))
## `geom_smooth()` using formula 'y ~ x'
dev.off()

ggarrange(fig_vax, scatterplot_vax, ncol = 1, labels = c("A", "B"))
## `geom_smooth()` using formula 'y ~ x'